#! /bin/sh
# PCP QA Test No. 041
# Test $PCP_RC_DIR pmcd and pmlogger start
#
# Copyright (c) 1995-2002 Silicon Graphics, Inc.  All Rights Reserved.
#

seq=`basename $0`
echo "QA output created by $seq"

# get standard filters
. ./common.product
. ./common.filter
. ./common.check

[ $PCP_PLATFORM = mingw ] && _notrun "Process stop signal not available on Win32"

_filter()
{
    tee -a $seq_full \
    | _filter_pmcd_log \
    | sed \
	-e '/^ *$/d' \
	-e 's/ -t [0-9][0-9]* / /' \
	-e '/UID/d' \
	-e '/UNAME/d' \
	-e '/USER/d' \
	-e '/host/s/host "[^"]*"/host SOMEHOST/' \
	-e 's/^ *root[^/]*\//    USERNAME ... \//' \
	-e 's/^ *pcp[^/]*\//    USERNAME ... \//' \
	-e 's/pmcd -.*/pmcd/' \
	-e '/__pmGetPDU:.*illegal PDU len=0/d' \
    | _filter_pcp_start
}

_expect()
{
    echo "" | tee -a $seq_full
    echo "### Expect: $* ###" | tee -a $seq_full
    echo "" | tee -a $seq_full
}

status=1
_needclean=true
TAG=000666000magic
LOGGING_DIR="$PCP_ARCHIVE_DIR"
trap "_cleanup" 0 1 2 3 15

_stop_auto_restart pmcd

_cleanup()
{
    if $_needclean
    then
	[ -f $tmp.control ] && \
	    $sudo cp $tmp.control $PCP_PMLOGGERCONTROL_PATH
	[ -d $tmp.control.d ] && \
	    $sudo mv $tmp.control.d $PCP_PMLOGGERCONTROL_PATH.d
	_service pmcd start 2>&1 | _filter_pcp_start
	_wait_for_pmcd
	_restore_auto_restart pmcd
	_service pmlogger start 2>&1 | _filter_pcp_start
	_wait_for_pmlogger
	_needclean=false
    fi
    $sudo rm -f $tmp.*
    exit $status
}

# real QA test starts here

# disable all pmloggers ...
$sudo cp $PCP_PMLOGGERCONTROL_PATH $tmp.control
$sudo mv $PCP_PMLOGGERCONTROL_PATH.d $tmp.control.d
cat <<End-of-File >$tmp.tmp
# dummy file created by qa/$seq on `date`
# the goal here is to have a controlled primary logger that does
# not make requests to pmcd!
\$version=1.1
LOCALHOSTNAME y n $LOGGING_DIR/LOCALHOSTNAME -c /dev/null
End-of-File
$sudo cp $tmp.tmp $PCP_PMLOGGERCONTROL_PATH

# I have no idea why this is required, but this test run
# after 040 sometimes produces garbled messages unless
# this is done!
if ! _service pmlogger stop >$tmp.out 2>$tmp.err; then _exit 1; fi
_wait_pmlogger_end || _exit 1
cat $tmp.out >>$seq_full
_filter_pcp_stop <$tmp.out
cat $tmp.err
if ! _service pmcd stop >$tmp.out 2>$tmp.err; then _exit 1; fi
_wait_pmcd_end || _exit 1
cat $tmp.out >>$seq_full
_filter_pcp_stop <$tmp.out
cat $tmp.err

# allow pmcd's socket teardown to complete
#
sleep 2

rm -f $tmp.out $tmp.err
_expect "connection refused"
pminfo -f sample.long.million 2>&1 | _filter

( _service pmcd restart; _service pmlogger restart ) >$tmp.one 2>&1
_wait_for_pmcd || _exit 1
_wait_for_pmlogger || _exit 1

_expect "one million retrieved OK from sample.long.million"
pminfo -f sample.long.million

sleep 3		# make sure pmlogger is not clobbered

pid=`_get_pids_by_name pmcd`
if [ -z "$pid" ]
then
    echo "Arrgh ... cannot find PID for pmcd!"
    ps $PCP_PS_ALL_FLAGS
    echo "=== pmcd.log ==="
    cat $PCP_LOG_DIR/pmcd/pmcd.log
    echo "=== pmcd.log.prev ==="
    cat $PCP_LOG_DIR/pmcd/pmcd.log.prev
    exit
fi
echo "pmcd pid=$pid" >>$seq_full
ps $PCP_PS_ALL_FLAGS | grep -E "PID|$pid" >>$seq_full

$sudo kill -STOP $pid

sleep 3		# make sure pmcd gets the SIGSTOP

# this will hang, and then see a timeout failure when PMCD is
# killed off
$sudo rm -f $tmp.tmp
PMCD_CONNECT_TIMEOUT=1000
export PMCD_CONNECT_TIMEOUT
pminfo -f sample.long.million >$tmp.tmp 2>&1 &

_expect "Forcing termination"
ps $PCP_PS_ALL_FLAGS | grep -E "PID|$pid" >>$seq_full
( $sudo $PCP_RC_DIR/pmcd restart; $sudo $PCP_RC_DIR/pmlogger restart ) >$tmp.two 2>&1
_wait_for_pmcd || _exit 1
_wait_for_pmlogger || _exit 1
wait

_filter_pcp_start <$tmp.one | _filter
_filter <$tmp.two
rm -f $tmp.out $tmp.err

_expect "Timeout waiting for a response from PMCD"
_filter <$tmp.tmp
rm -f $tmp.tmp

sleep 3		# don't ask, I don't understand either (kenj)

_expect "one million again"
pminfo -f sample.long.million

# all OK
status=0
exit
